#!/usr/bin/env perl
use 5.010;
use strict;
use warnings;

use Pod::Usage ();
use Getopt::Long ();

BEGIN {
    eval "require MediaWiki::API; require YAML::XS;" or do {
        print "You have to install some modules via CPAN to run this:\n";
        print "   sudo cpanp MediaWiki::API YAML::XS\n";
        exit 1;
    };
}

use MediaWiki::API;
use YAML::XS qw(Dump);
use Test::More 'no_plan';

=head1 NAME

update-wiki-pages - Scrape the wiki for key/value wiki description pages

=head1 SYNOPSIS

    perl script/misc/update-wiki-pages config/wiki_pages.yml 

Or with prove(1):

    prove -e 'perl script/misc/update-wiki-pages' config/wiki_pages.yml

=cut

# Get the command-line options
Getopt::Long::Parser->new(
    config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
)->getoptions(
    'h|help' => \my $help,
) or help();

# On --help
help() if $help;

my $out_file = $ARGV[0];
$out_file //= 'config/wiki_pages.yml';

help() unless -f $out_file;

# Get a API interface
my $mw = MediaWiki::API->new();
ok($mw, "Got a MediaWiki API");
$mw->{config}->{api_url} = 'http://wiki.openstreetmap.org/w/api.php';

# All our goodies
my (%feature, %count);

# This is what you get on:
## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search
for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) {
    ok(1, "  Templates for language '$lang'");

    # Key pages
    ok(1, "    Getting key pages");
    my $cnt = stick_content_in_hash("key", "Template:${lang}KeyDescription", \%feature);
    $cnt += stick_content_in_hash("key", "Template:${lang}Feature", \%feature);
    ok(1, "    Got $cnt key pages");
    $count{key} += $cnt;

    # Value pages
    ok(1, "    Getting value pages");
    $cnt = stick_content_in_hash("tag", "Template:${lang}ValueDescription", \%feature);
    ok(1, "    Got $cnt value pages");
    $count{value} += $cnt;
}

ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ];

# Dump to .yml file
open my $out, ">", $out_file or die "Can't open file '$out_file' supplied on the command line";
say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-wiki-pages";
say $out "# PROGRAM DO NOT MANUALLY EDIT IT";
say $out "";
say $out Dump(\%feature);
close $out;

exit 0;

sub stick_content_in_hash
{
    my ($key, $title, $hash) = @_;
    my $ukey = ucfirst $key;

    my $space_to_underscore = sub {
        my $txt = shift;
        $txt =~ s/ /_/g;
        $txt;
    };

    my $count = 0;

    my $process_link = sub {
        my $link = shift;
        $count++;
        ok(1, "    ... got $count links") if $count % 200 == 0;
        my $title = $link->{title};
        my $lang;
        my $key_name;
        if ($title =~ /^$ukey:(?<key_name>.*?)$/) {
            # English by default
            $lang = "en";
            $key_name = $space_to_underscore->($+{key_name});
        } elsif ($title =~ /^(?<lang>[^:]+):$ukey:(?<key_name>.*?)$/) {
            $lang = lc $+{lang};
            $key_name = $space_to_underscore->($+{key_name});
        }
        if ($lang && !exists($hash->{$lang}->{$key}->{$key_name})) {
            $hash->{$lang}->{$key}->{$key_name} = $title;
        }
    };

    get_embeddedin(
        $title,
        sub {
            my $link = shift;
            $process_link->($link);
            get_redirects(
                $link->{title},
                sub {
                    my $link = shift;
                    $process_link->($link) if exists($link->{redirect});
                }
            );
        }
    );

    return $count;
}

sub process_list
{
    my $callback = shift;
    my $links = shift;
    for my $link (@$links) {
        $callback->($link);
    }
}

sub get_embeddedin
{
    my ($title, $callback) = @_;
    my $articles = $mw->list(
        {
            action => 'query',
            list => 'embeddedin',
            eititle => $title,
            eifilterredir => 'nonredirects',
            # Doesn't work for De:* and anything non-en. Odd.
            # einamespace => '0|8',
            eilimit => '200',
        },
        {
            max => '0',
            hook => sub { process_list($callback, @_) },
            skip_encoding => 1,
        }
    ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}

sub get_redirects
{
    my ($title, $callback) = @_;
    my $articles = $mw->list(
        {
            action => 'query',
            list => 'backlinks',
            bltitle => $title,
            blfilterredir => 'redirects',
            # Doesn't work for De:* and anything non-en. Odd.
            # einamespace => '0|8',
            bllimit => '200',
        },
        {
            max => '0',
            hook => sub { process_list($callback, @_) },
            skip_encoding => 1,
        }
    ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}

sub help
{
    my %arg = @_;

    Pod::Usage::pod2usage(
        -verbose => $arg{ verbose },
        -exitval => $arg{ exitval } || 0,
    );
}
